/*************************************************
author: RuanShengQiang
date: 2017/3/24
**************************************************/
#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw
#define PI 3.141592653589f

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

//to amp "amp" times the picture;
static vec2 scale(vec2 uv, float amp)
{

	return fmod((uv + (vec2)(0.5f))*(amp), 1.0f);

}
static vec2 _rotate(vec2 uv, vec2 center, float theta)
{
	vec2 temp;
	float cosTemp = cos(theta);
	float sinTemp = sin(theta);
	temp.x = dot((vec2)(cosTemp, -sinTemp), uv - center);
	temp.y = dot((vec2)(sinTemp, cosTemp), uv - center);
	return temp;
}

static vec2 warp(vec2 uv, vec2 center, float strength, float r, float amp)
{

	return fmod(center + (uv - center) / (strength*r*r + strength*r + 1.0f), 1.0f / amp);
}
static vec2 myMod(vec2 x, float y)
{
	return x-y * floor (x/y);
}

static vec2 mirror(vec2 uv)
{
	vec2 parity = myMod(floor(uv),2.0f);
	vec2 sign = -2.0f*parity+1.0f;
	return myMod( 2.0f*parity+sign*myMod(uv,1.0f),1.0f);
}

static vec4 INPUT(image2d_t src_data, __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, (vec2)(tc.x, 1.0f - tc.y));
}

static float normalizedRandom(vec2 co)
{
	float temp;
   return fract(sin(dot(co.xy ,(vec2)(12.9898f,78.233f))) * 43758.5453f, &temp);
}

static float4 blur(image2d_t input, __global FilterParam* param, vec2 uv, vec2 dir, int Samples, float processMB, vec2 rollDir)
{

	vec2 temp1;
	vec2 temp2;
	vec2 temp3;
	float2 diver = dir*processMB/(float)(Samples);
	vec4 color = (vec4)(0.0f,0.0f,0.0f,1.0f);
	for (int i = 0; i < Samples; i += 2) //operating at 2 samples for better performance
	{
		temp1 = mirror( uv + (float)(i) * diver +rollDir );
		color += INPUT(input, param, temp1);
		temp1 = mirror( uv + (float)(i+1) * diver +rollDir );
		color += INPUT(input, param, temp1);
	}
	return color;
}

__kernel void MAIN(__read_only image2d_t input1, __read_only image2d_t input2, __write_only image2d_t dstImg,__global FilterParam* param)
{
	
    float progress = param->cur_time / param->total_time;
	int W = get_global_size(0);
	int H = get_global_size(1);
	int textH = param->height[2];
	int w = get_global_id(0);
	int h = get_global_id(1);
	float2 resolution = (float2)(W,H);
	int2 gl_FragCoord = (int2)(get_global_id(0), get_global_id(1));
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 uv = fragCoord/resolution.xy;
	vec2 tempUv = uv;
	float iGlobalTime = progress;
	const float sampleSize = 32.0f;
	const int Samples = 16;//multiple of 2

	vec2 center = (vec2)(0.5f);
	vec2 dir = (fragCoord.xy) / resolution.xy - center;
	float r = length(dir);
	
	
	float processWarp = 0.0f;//exp((iGlobalTime-0.5)*100.0);// warp amp
	float processShade = pow(cos(PI*(iGlobalTime-0.5f)),2.0f);//shading amp
	
	float processScal = 11.0f*pow(sin(7.853992f*(iGlobalTime)),3.5f); // scale amp
	float processColorSp = pow(cos(PI*(iGlobalTime-0.5f)),2.0f)*0.01f;
	float processMB = processScal*0.2f;// the blur trend is same as scale
	float rotaDegree = 0.0f;
	float processRota = PI/90.0f*rotaDegree*iGlobalTime;//iGlobalTime..[0,0.5]
	
	//time > 0.5
	float processWarpR = 0.0f;//pow(cos(PI*(iGlobalTime-0.5)),2.0);// warp amp

	float processScalR = 1.0f-0.8f*exp(-8.0f*(iGlobalTime-0.2f));
	float processMBR = 0.5f*(iGlobalTime-1.0f)*(iGlobalTime-1.0f);
	float processRotaR = PI/90.0f*rotaDegree*(1.0f-iGlobalTime);//iGlobalTime..[0.5,1.0]
	
	float shadingXL = 0.03f*(normalizedRandom((vec2)(1.0f,iGlobalTime))-0.5f)*processShade;
	float shadingyL = 0.03f*(normalizedRandom((vec2)(10.0f,iGlobalTime/3.0f))-0.5f)*processShade;
	vec2 shadingVec2L = (vec2)(shadingXL,shadingyL);
	
	float shadingXR = 0.01f*(normalizedRandom((vec2)(1.0f,iGlobalTime))-0.5f)*processShade;
	float shadingyR = 0.01f*(normalizedRandom((vec2)(10.0f,iGlobalTime/3.0f))-0.5f)*processShade;
	vec2 shadingVec2R = (vec2)(shadingXR,shadingyR);
	
	
if (iGlobalTime<0.2f)
    { 		
		dir = _rotate(dir, (vec2)(0.0f), processRota);
		uv = center+(uv - center)/(1.0f+1.0f*processScal);//scaling compensation
		vec2 scaleDir =  dir/(1.0f+1.0f*processScal);
		uv = center + scaleDir/(pow(1.0f+20.0f*processWarp,r));//warp 
		float count = 0.0f;
		vec4 color = globalBlur(input1, param, uv, scaleDir,Samples, processMB,(vec2)(0.0f), resolution);
		write_imagef(dstImg, (int2)(w, textH - h -1), color/(float)(Samples));
    }
	else {
		
		dir = _rotate(dir, (vec2)(0.0f), processRotaR);
		uv = fragCoord.xy/resolution.xy;
		uv = center+(uv - center)/processScalR;//scaling compensation
		vec2 scaleDir =  dir/processScalR;
		float count = 0.0f;
		vec4 color = globalBlur(input2, param, uv, scaleDir,Samples, processMBR,(vec2)(0.0f), resolution);
		write_imagef(dstImg, (int2)(w, textH - h -1), color/(float)(Samples));
	}   
}